In [1]:
import scanpy as sc
import pandas as pd
from matplotlib import rcParams
In [2]:
sc.set_figure_params(dpi=80, color_map='viridis')
sc.settings.verbosity = 2
sc.logging.print_versions()
scanpy==1.5.1 anndata==0.7.3 umap==0.4.3 numpy==1.18.1 scipy==1.4.1 pandas==1.0.1 scikit-learn==0.22.1 statsmodels==0.11.0 python-igraph==0.7.1 louvain==0.7.0 leidenalg==0.8.0
In [3]:
pbmc = sc.datasets.pbmc68k_reduced()
In [4]:
rcParams['figure.figsize'] = 4, 4
sc.pl.umap(pbmc, color=['bulk_labels'], s=50)
In [5]:
marker_genes = ['CD79A', 'MS4A1', 'CD8A', 'CD8B', 'LYZ',  'LGALS3', 'S100A8', 'GNLY', 'NKG7', 'KLRB1',
                'FCGR3A', 'FCER1A', 'CST3']

Stacked violins

Plot marker genes per cluster using stacked violin plots.

In [6]:
ax = sc.pl.stacked_violin(pbmc, marker_genes, groupby='bulk_labels',
                         var_group_positions=[(7, 8)], var_group_labels=['NK'])
In [7]:
# Same as before but swapping the axes and with dendrogram (notice that the categories are reordered).

ax = sc.pl.stacked_violin(pbmc, marker_genes, groupby='bulk_labels', swap_axes=True,
                         var_group_positions=[(7, 8)], var_group_labels=['NK'], dendrogram=True)
WARNING: dendrogram data not found (using key=dendrogram_bulk_labels). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
    using 'X_pca' with n_pcs = 50
Storing dendrogram info using `.uns['dendrogram_bulk_labels']`
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: CD4+/CD25 T Reg, CD4+/CD45RA+/CD25- Naive T, CD4+/CD45RO+ Memory, etc.
var_group_labels: NK
In [8]:
## Dot plots
marker_genes_dict = {'B-cell': ['CD79A', 'MS4A1'],
                     'T-cell': 'CD3D',
                     'T-cell CD8+': ['CD8A', 'CD8B'],
                     'NK': ['GNLY', 'NKG7'],
                     'Myeloid': ['CST3', 'LYZ'],
                     'Monocytes': ['FCGR3A'],
                     'Dendritic': ['FCER1A']}

# use marker genes as dict to group them
ax = sc.pl.dotplot(pbmc, marker_genes_dict, groupby='bulk_labels')
In [9]:
ax = sc.pl.dotplot(pbmc,
                   marker_genes,
                   groupby='bulk_labels',
                   dendrogram=True,
                   dot_max=0.5,
                   dot_min=0.3,
                   standard_scale='var')
In [10]:
ax = sc.pl.dotplot(pbmc,
                   marker_genes_dict,
                   groupby='bulk_labels',
                   dendrogram=True,
                   standard_scale='var',
                   smallest_dot=40,
                   color_map='Blues',
                   figsize=(8,5))
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: CD4+/CD25 T Reg, CD4+/CD45RA+/CD25- Naive T, CD4+/CD45RO+ Memory, etc.
var_group_labels: B-cell, T-cell, T-cell CD8+, etc.
In [11]:
ax = sc.pl.dotplot(pbmc, marker_genes, groupby='louvain',
              var_group_positions=[(0,1), (11, 12)],
              var_group_labels=['B cells', 'dendritic'],
              figsize=(12,4), var_group_rotation=0, dendrogram='dendrogram_louvain')
WARNING: dendrogram data not found (using key=dendrogram_louvain). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
    using 'X_pca' with n_pcs = 50
Storing dendrogram info using `.uns['dendrogram_louvain']`
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: B cells, dendritic

Matrix plots

The matrixplot shows the mean expression of a gene in a group by category as a heatmap. In contrast to dotplot, the matrix plot can be used with corrected and/or scaled counts. By default raw counts are used.

In [12]:
gs = sc.pl.matrixplot(pbmc, marker_genes_dict, groupby='bulk_labels')
In [13]:
gs = sc.pl.matrixplot(pbmc,
                      marker_genes_dict,
                      groupby='bulk_labels',
                      dendrogram=True,
                      standard_scale='var')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: CD4+/CD25 T Reg, CD4+/CD45RA+/CD25- Naive T, CD4+/CD45RO+ Memory, etc.
var_group_labels: B-cell, T-cell, T-cell CD8+, etc.
In [14]:
# to use the 'non-raw' data we select marker genes present in this data.
marker_genes_2 = [x for x in marker_genes if x in pbmc.var_names]
In [15]:
gs = sc.pl.matrixplot(pbmc, marker_genes_2, groupby='bulk_labels', dendrogram=True,
                      use_raw=False, vmin=-3, vmax=3, cmap='bwr',  swap_axes=True, figsize=(5,6))

Heatmaps

In [16]:
ax = sc.pl.heatmap(pbmc,marker_genes_dict, groupby='louvain')
In [17]:
ax = sc.pl.heatmap(pbmc, marker_genes, groupby='louvain', figsize=(5, 8),
              var_group_positions=[(0,1), (11, 12)], use_raw=False, vmin=-3, vmax=3, cmap='bwr',
              var_group_labels=['B cells', 'dendritic'], var_group_rotation=0, dendrogram='dendrogram_louvain')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: B cells, dendritic
In [18]:
# Track plot data is better visualized using the non-log counts
import numpy as np
ad = pbmc.copy()
ad.raw.X.data = np.exp(ad.raw.X.data)
In [19]:
ax = sc.pl.tracksplot(ad, marker_genes, groupby='louvain')

Visualization of marker genes

In [21]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
sc.tl.rank_genes_groups(pbmc, groupby='bulk_labels', method='logreg')
ranking genes
    finished (0:00:00)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
In [22]:
rcParams['figure.figsize'] = 4,4
rcParams['axes.grid'] = True
sc.pl.rank_genes_groups(pbmc)
In [23]:
sc.pl.rank_genes_groups_dotplot(pbmc, n_genes=4)
In [24]:
axs = sc.pl.rank_genes_groups_dotplot(pbmc, n_genes=15, groups=['Dendritic', 'CD19+ B'])
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: CD4+/CD25 T Reg, CD4+/CD45RA+/CD25- Naive T, CD4+/CD45RO+ Memory, etc.
var_group_labels: Dendritic, CD19+ B
In [25]:
axs = sc.pl.rank_genes_groups_dotplot(pbmc, groupby='louvain', n_genes=4, dendrogram='dendrogram_louvain')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: CD4+/CD25 T Reg, CD4+/CD45RA+/CD25- Naive T, CD4+/CD45RO+ Memory, etc.
In [26]:
axs = sc.pl.rank_genes_groups_matrixplot(pbmc, n_genes=3, standard_scale='var', cmap='Blues')
In [27]:
axs = sc.pl.rank_genes_groups_matrixplot(pbmc, n_genes=3, use_raw=False, vmin=-3, vmax=3, cmap='bwr')
In [28]:
# instead of pbmc we use the 'ad' object (created earlier) in which the raw matrix is exp(pbmc.raw.matrix). This
# highlights better the differences between the markers.
sc.pl.rank_genes_groups_stacked_violin(ad, n_genes=3)
In [29]:
# setting row_palette='slateblue' makes all violin plots of the same color
sc.pl.rank_genes_groups_stacked_violin(ad, n_genes=3, row_palette='slateblue')
In [30]:
# width is used to set the violin plot width. Here, after setting figsize wider than default,
# the `width` arguments helps to keep the violin plots thin.
sc.pl.rank_genes_groups_stacked_violin(ad, n_genes=3, swap_axes=True, figsize=(6, 10), width=0.4)
In [31]:
sc.pl.rank_genes_groups_heatmap(pbmc, n_genes=3, standard_scale='var')
In [32]:
sc.pl.rank_genes_groups_heatmap(pbmc, n_genes=3, use_raw=False, swap_axes=True, vmin=-3, vmax=3, cmap='bwr')
In [33]:
sc.pl.rank_genes_groups_heatmap(pbmc, n_genes=10, use_raw=False, swap_axes=True, show_gene_labels=False,
                                vmin=-3, vmax=3, cmap='bwr')
In [34]:
sc.pl.rank_genes_groups_tracksplot(ad, n_genes=3)
In [35]:
# compute hiearchical clustering based on the
# given `var_names` from the raw matrix
sc.tl.dendrogram(pbmc, 'bulk_labels', n_pcs=30)
    using 'X_pca' with n_pcs = 30
Storing dendrogram info using `.uns['dendrogram_bulk_labels']`
In [ ]: